Data
- RNAseq samples from cortical tissue
## ---- load cleaned RNA-seq count file and design file ready
load("~/Documents/code/short-term-diet/01_EdgeR_glmQLFit/data/01_data_cleaning.RData")
source("~/Documents/code/01_function/my_edgeR.R") # tidyverse library is loaded in my_edgeR.R
out_prefix<-"Cortex.RMoutlier"
out_analysis <- "./analysis/"
out_figure <- "./figure/"
data.raw=data.raw.C
data.design=data.design.C
group=group.C
library(tidyverse)
#remove outlier sample CD.2weeks.Sed.C_17989C and get rid of all running groups (pre-determined in Cortex.Rmd)
data.raw <- data.raw %>% select(-contains("17989"),-contains("Run"))
data.design <- data.design %>% filter(!Sample_Name %in% c("17989C"), TERM_3 == "Sed")
group <- factor(data.design$Group)
Quality Control 1:
- Build edgeR object
- filtering
- normalization
- estimate dispersion
# build edgeR object
library(edgeR)
y <- DGEList(data.raw, group=group, genes=row.names(data.raw)) # must specify
options(digits=3)
y$samples
# before filtering and normalization
log.cpm <- cpm(y, log=TRUE, prior.count=2)
boxplot(log.cpm)

hist(apply(log.cpm, 2, median), xlab="median of log2(cpm)", main="")

# attach gene symbols to edgeR object
y<- symbols(y)
# The following package has already been detached within symbol function I wrote
#detach("package:org.Mm.eg.db", unload=TRUE) # conflict with select function, so to detach
#detach("package:AnnotationDbi", unload=TRUE)
head(y$genes)
## dropNAsymbols
y <- y[!is.na(y$genes$Symbol),]
# keep gene more than 1cpm in at least 2 samples
dim(y)
[1] 23103 46
keep <- rowSums(cpm(y) > 1) >= 2 ## filter
table(keep)
keep
FALSE TRUE
8774 14329
y <- y[keep, , keep.lib.sizes=FALSE]
# after filtering and before normalization
log.cpm <- cpm(y, log=TRUE, prior.count=2)
boxplot(log.cpm)

hist(apply(log.cpm, 2, median), xlab="median of log2(cpm)", main="")

# estimate dispersion
# design
design <- model.matrix(~0+group)
colnames(design) <- levels(group)
design
CD.2weeks.Sed.C CD.4weeks.Sed.C CD.6weeks.Sed.C CD.8weeks.Sed.C
1 0 0 0 0
2 0 0 0 0
3 0 0 0 0
4 0 0 0 0
5 0 0 0 0
6 0 0 0 0
7 0 0 0 0
8 0 0 0 0
9 0 0 0 0
10 0 0 0 0
11 0 0 0 0
12 0 0 0 0
13 0 0 0 0
14 0 0 0 0
15 0 0 0 0
16 0 0 0 0
17 0 0 0 0
18 0 0 0 0
19 1 0 0 0
20 1 0 0 0
21 1 0 0 0
22 1 0 0 0
23 0 1 0 0
24 0 1 0 0
25 0 1 0 0
26 0 1 0 0
27 0 1 0 0
28 0 0 1 0
29 0 0 1 0
30 0 0 1 0
31 0 0 1 0
32 0 0 0 1
33 0 0 0 1
34 0 0 0 0
35 0 0 0 0
36 0 0 0 1
37 0 0 0 1
38 0 0 0 0
39 0 0 0 0
40 0 0 0 0
41 0 0 0 0
42 1 0 0 0
43 0 1 0 0
44 0 0 1 0
45 0 0 1 0
46 0 0 0 1
WD.2weeks.Sed.C WD.4weeks.Sed.C WD.6weeks.Sed.C WD.8weeks.Sed.C
1 1 0 0 0
2 1 0 0 0
3 1 0 0 0
4 1 0 0 0
5 0 1 0 0
6 0 1 0 0
7 0 1 0 0
8 0 1 0 0
9 0 1 0 0
10 0 0 1 0
11 0 0 1 0
12 0 0 1 0
13 0 0 1 0
14 0 0 1 0
15 0 0 0 1
16 0 0 0 1
17 0 0 0 1
18 0 0 0 1
19 0 0 0 0
20 0 0 0 0
21 0 0 0 0
22 0 0 0 0
23 0 0 0 0
24 0 0 0 0
25 0 0 0 0
26 0 0 0 0
27 0 0 0 0
28 0 0 0 0
29 0 0 0 0
30 0 0 0 0
31 0 0 0 0
32 0 0 0 0
33 0 0 0 0
34 0 0 0 1
35 0 0 0 1
36 0 0 0 0
37 0 0 0 0
38 1 0 0 0
39 1 0 0 0
40 0 1 0 0
41 0 0 1 0
42 0 0 0 0
43 0 0 0 0
44 0 0 0 0
45 0 0 0 0
46 0 0 0 0
attr(,"assign")
[1] 1 1 1 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
# estimateDisp--------------------------------------------------------
y <- estimateDisp(y, design, robust=TRUE)
# plotBCV, width="3.8in", fig.cap="Scatterplot of the biological coefficient of variation (BCV) against the average abundance of each gene. The plot shows the square-root estimates of the common, trended and tagwise NB dispersions."----
plotBCV(y)

# glmQLFit------------------------------------------------------------
fit <- glmQLFit(y, design, robust=TRUE)
head(fit$coefficients)
CD.2weeks.Sed.C CD.4weeks.Sed.C CD.6weeks.Sed.C
ENSMUSG00000000001 -10.16 -10.24 -10.09
ENSMUSG00000000028 -13.57 -13.42 -13.71
ENSMUSG00000000037 -13.28 -13.75 -13.06
ENSMUSG00000000056 -9.98 -9.92 -9.99
ENSMUSG00000000058 -10.09 -10.22 -9.87
ENSMUSG00000000078 -9.78 -9.83 -9.68
CD.8weeks.Sed.C WD.2weeks.Sed.C WD.4weeks.Sed.C
ENSMUSG00000000001 -10.06 -10.17 -10.17
ENSMUSG00000000028 -13.40 -13.55 -13.48
ENSMUSG00000000037 -13.22 -13.28 -12.88
ENSMUSG00000000056 -9.85 -9.97 -10.05
ENSMUSG00000000058 -9.95 -10.01 -10.18
ENSMUSG00000000078 -9.73 -9.73 -9.82
WD.6weeks.Sed.C WD.8weeks.Sed.C
ENSMUSG00000000001 -10.05 -10.12
ENSMUSG00000000028 -13.58 -13.45
ENSMUSG00000000037 -13.35 -12.98
ENSMUSG00000000056 -9.95 -9.87
ENSMUSG00000000058 -9.85 -9.93
ENSMUSG00000000078 -9.61 -9.67
# QLDisp, out.width="3.8in", fig.cap="A plot of the quarter-root QL dispersion against the average abundance of each gene. Estimates are shown for the raw (before EB moderation), trended and squeezed (after EB moderation) dispersions. Note that the QL dispersions and trend shown here are relative to the NB dispersion trend shown in Figure~\ref{fig:plotBCV}."----
plotQLDisp(fit)

# df.prior------------------------------------------------------------
summary(fit$df.prior)
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.57 3.57 3.64 3.64 3.70 3.70
Quality Control 2:
- pearson correlation among samples
- MDS plot
- Principle Component Analysis
# pearson correlation
cpm <- cpm(y, normalized.lib.size=T)
cpm.cor <- cor(cpm, method = "pearson")
hist(cpm.cor)

library(corrplot)
corrplot(cpm.cor, method="square", order="hclust", cl.lim = c(0.85, 1), tl.col="purple", tl.cex = 0.75, type = "full", is.corr = FALSE)
colorlegend(colbar = grey(1:100 / 100), 1:10, col = "red", align = "l",
xlim = c(0, 6), ylim = c(-0.5,-0.1), vertical = FALSE)

# MDS plot
# no label version
par(xpd = T, mar = par()$mar + c(0,0,0,7)) # to make legends outside of the plot
colors <- rep(c("darkgreen", "red", "blue", "purple"), 2)
pch <- c(0,1,2,5,15,16,17,18)
plotMDS(y, top = 500, cex = 1, pch=pch[group], dim.plot = c(1,2), ndim = 2, gene.selection = "pairwise", col=colors[group]) # col = as.numeric(group) differentiate colors between groups
legend(1.2, 0.7,levels(group), pch=pch, col=colors)

par(mar=c(5, 4, 4, 2.3) + 0.1)
# with label version, to identify outliers
par(xpd = T, mar = par()$mar + c(0,0,0,7)) # to make legends outside of the plot
colors <- rep(c("darkgreen", "red", "blue", "purple"), 2)
# pch <- c(0,1,2,5,15,16,17,18)
plotMDS(y, top = 500, cex = 1, dim.plot = c(1,2), ndim = 2, gene.selection = "pairwise", col=colors[group]) # col = as.numeric(group) differentiate colors between groups
legend(1.2, 0.7,levels(group), pch=pch, col=colors)

par(mar=c(5, 4, 4, 2.3) + 0.1)
## ---- Pincicpal component analysis ---
logCPM.PCA<-log.cpm
rownames(logCPM.PCA) <- y$genes$Symbol
#colnames(logCPM) <- paste(y$samples$group, 1:2, sep="-")
colnames(logCPM.PCA) <- data.design$sample_short # get it into the y project
pca_original = prcomp(t(logCPM.PCA),scale=T, center=T)
pca_x <- pca_original$x
pca_table <- data.frame(pca_x, data.design)
x <- pca_original$sdev^2/sum(pca_original$sdev^2) # Proportion of Variance Explained for all components
## Scree plot
plot(x, xlab="Principal Component", ylab="Proportion of Variance Explained", type="b")

plot(cumsum(x), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", type="b")

## PCA plot
library(ggplot2)
PCA_plot <- function(pca_table, PC_x, PC_y, color, shape){
#PC_x,PC_y are type of interger
#color, shape, are type of string
g <- ggplot(pca_table, aes_string(x=names(pca_table[PC_x]), y=names(pca_table[PC_y]), color=color, shape=shape))
g <- g + geom_point(alpha=0.7, size=3)
# g <- g + labs(color = "Group", shape="Tissue")
g + labs(x = paste(names(pca_table[PC_x]), scales::percent(x[PC_x]),"variance explained", sep=" "), y=paste(names(pca_table[PC_y]), scales::percent(x[PC_y]),"variance explained", sep=" "))
#filename <- paste()
#ggsave(filename, width=7, height=7, units="in")
}
PCA_plot(pca_table, 1, 2, "TERM_1", "TERM_2")

PCA_plot(pca_table, 2, 3, "TERM_1", "TERM_2")

Data Manipulation for GLM
# transpose cpm, observation (row) is sample, variable (column) is each gene.
cpm <- cpm(y, normalized.lib.size=T)
cpm.t<-t(cpm)
colnames(cpm)
[1] "WD.2weeks.Sed.C_17963C" "WD.2weeks.Sed.C_17964C"
[3] "WD.2weeks.Sed.C_17966C" "WD.2weeks.Sed.C_17967C"
[5] "WD.4weeks.Sed.C_17969C" "WD.4weeks.Sed.C_17970C"
[7] "WD.4weeks.Sed.C_17971C" "WD.4weeks.Sed.C_17972C"
[9] "WD.4weeks.Sed.C_17973C" "WD.6weeks.Sed.C_17974C"
[11] "WD.6weeks.Sed.C_17976C" "WD.6weeks.Sed.C_17977C"
[13] "WD.6weeks.Sed.C_17978C" "WD.6weeks.Sed.C_17979C"
[15] "WD.8weeks.Sed.C_17980C" "WD.8weeks.Sed.C_17981C"
[17] "WD.8weeks.Sed.C_17982C" "WD.8weeks.Sed.C_17985C"
[19] "CD.2weeks.Sed.C_17986C" "CD.2weeks.Sed.C_17987C"
[21] "CD.2weeks.Sed.C_17990C" "CD.2weeks.Sed.C_17991C"
[23] "CD.4weeks.Sed.C_17992C" "CD.4weeks.Sed.C_17993C"
[25] "CD.4weeks.Sed.C_17995C" "CD.4weeks.Sed.C_17996C"
[27] "CD.4weeks.Sed.C_17997C" "CD.6weeks.Sed.C_17999C"
[29] "CD.6weeks.Sed.C_18000C" "CD.6weeks.Sed.C_18602C"
[31] "CD.6weeks.Sed.C_18603C" "CD.8weeks.Sed.C_18605C"
[33] "CD.8weeks.Sed.C_18606C" "WD.8weeks.Sed.C_18624C"
[35] "WD.8weeks.Sed.C_18625C" "CD.8weeks.Sed.C_18626C"
[37] "CD.8weeks.Sed.C_18627C" "WD.2weeks.Sed.C_18673C"
[39] "WD.2weeks.Sed.C_18674C" "WD.4weeks.Sed.C_18675C"
[41] "WD.6weeks.Sed.C_18676C" "CD.2weeks.Sed.C_18677C"
[43] "CD.4weeks.Sed.C_18678C" "CD.6weeks.Sed.C_18679C"
[45] "CD.6weeks.Sed.C_18680C" "CD.8weeks.Sed.C_18681C"
all(data.design$sample_short == rownames(cpm.t))
[1] TRUE
# add design file to transposed cpm, data manipulation using filter based on design file TERMs (TERM_1 - TERM_4)
cpm.t.meta <- cbind(cpm.t, data.design)
# select all the WD and Sed group
cpm.WD.t <-cpm.t.meta %>% filter(TERM_1 == "WD", TERM_3 == "Sed")
# remove metadata and transpose table back, rows = genes, columns = samples
gene_number <- dim(cpm)[1]
gene_number
[1] 14329
cpm.WD <- cpm.WD.t[, c(1: gene_number)] %>% t()
# check whether metadata (which contains characters has been removed, values should all be doulbe type now)
all(map_lgl(cpm.WD, is.double))
[1] TRUE
# assign sample_name_short it to the colnames after transposition.
colnames(cpm.WD) <- as.character(cpm.WD.t$sample_short)
# select all the CD and Sed group, and calculate the mean of each group based on weeks
cpm.ave.CD.meta <- cpm.t.meta %>% filter(TERM_1 == "CD") %>% group_by(TERM_2) %>% summarise_all(mean)
# check the column boundray of cpm
cpm.ave.CD.meta[,1:2]
cpm.ave.CD.meta[,(gene_number+1):(gene_number+2)]
# remove meta column and transform
cpm.ave.CD <- cpm.ave.CD.meta[, 2:(gene_number+1)] %>% t()
colnames(cpm.ave.CD) <- cpm.ave.CD.meta[,1] %>% unlist() %>% paste("CD.ave.", ., sep="")
all(map_lgl(cpm.ave.CD, is.double))
[1] TRUE
# The following is to subtract the mean of CD from WD of each week, and glm model the difference between WD and CD group
dim(cpm.WD)
[1] 14329 24
dim(cpm.ave.CD)
[1] 14329 4
cpm.clean <- data.frame(cpm.WD, cpm.ave.CD) ## column 1:14395 are WD count read, 14396:14399 are CD count read.
dim(cpm.clean)
[1] 14329 28
head(cpm.clean)
# WD-CD.ave, result will be combined into D_all value
D_2wk <- cpm.clean %>% select(contains("2weeks")) %>% mutate_all(funs(Dif=.- CD.ave.2weeks)) %>% select(contains("C_Dif"))
D_4wk <- cpm.clean %>% select(contains("4weeks")) %>% mutate_all(funs(Dif=.- CD.ave.4weeks)) %>% select(contains("C_Dif"))
D_6wk <- cpm.clean %>% select(contains("6weeks")) %>% mutate_all(funs(Dif=.- CD.ave.6weeks)) %>% select(contains("C_Dif"))
D_8wk <- cpm.clean %>% select(contains("8weeks")) %>% mutate_all(funs(Dif=.- CD.ave.8weeks)) %>% select(contains("C_Dif"))
D_all <- data.frame(D_2wk, D_4wk, D_6wk, D_8wk)
rownames(D_all) <- rownames(cpm.ave.CD)
head(D_all)
Generalized linear mGLM
z ~ u + duration, z= y(WD)-y(CD)ave
- Model the difference of WD and CD by duration on diet
- The difference of WD and CD is calculated by subtracting the average of CD from each WD
- duration: 2wks, 4wks, 6wks, 8wks
## ---- GLM, z ~ u + duration, z= y(WD)-y(CD)
# use durataion as predictor, duration matches the column of D_all
duration <- c(rep("2weeks", 6), rep("4weeks", 6), rep("6weeks", 6), rep("8weeks", 6))
# for each gene, generate GLM with duration as prediction, collect the summary of the model for each gene
glm.fit <- D_all %>% t() %>% as.data.frame() %>%
map(~ lm(. ~ duration)) %>%
map(summary)
colnames(D_all)
[1] "WD.2weeks.Sed.C_17963C_Dif" "WD.2weeks.Sed.C_17964C_Dif"
[3] "WD.2weeks.Sed.C_17966C_Dif" "WD.2weeks.Sed.C_17967C_Dif"
[5] "WD.2weeks.Sed.C_18673C_Dif" "WD.2weeks.Sed.C_18674C_Dif"
[7] "WD.4weeks.Sed.C_17969C_Dif" "WD.4weeks.Sed.C_17970C_Dif"
[9] "WD.4weeks.Sed.C_17971C_Dif" "WD.4weeks.Sed.C_17972C_Dif"
[11] "WD.4weeks.Sed.C_17973C_Dif" "WD.4weeks.Sed.C_18675C_Dif"
[13] "WD.6weeks.Sed.C_17974C_Dif" "WD.6weeks.Sed.C_17976C_Dif"
[15] "WD.6weeks.Sed.C_17977C_Dif" "WD.6weeks.Sed.C_17978C_Dif"
[17] "WD.6weeks.Sed.C_17979C_Dif" "WD.6weeks.Sed.C_18676C_Dif"
[19] "WD.8weeks.Sed.C_17980C_Dif" "WD.8weeks.Sed.C_17981C_Dif"
[21] "WD.8weeks.Sed.C_17982C_Dif" "WD.8weeks.Sed.C_17985C_Dif"
[23] "WD.8weeks.Sed.C_18624C_Dif" "WD.8weeks.Sed.C_18625C_Dif"
glm.fit %>% names() %>% head()
[1] "ENSMUSG00000000001" "ENSMUSG00000000028" "ENSMUSG00000000037"
[4] "ENSMUSG00000000056" "ENSMUSG00000000058" "ENSMUSG00000000078"
# look at two examples "Apoe", "Pecam1" in glm model
y$genes[y$genes$Symbol=="Apoe",]
glm.fit[["ENSMUSG00000002985"]]
Call:
lm(formula = . ~ duration)
Residuals:
Min 1Q Median 3Q Max
-139.56 -84.47 7.05 49.36 284.98
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -18.7 45.4 -0.41 0.68
duration4weeks 34.1 64.1 0.53 0.60
duration6weeks 62.7 64.1 0.98 0.34
duration8weeks 66.7 64.1 1.04 0.31
Residual standard error: 111 on 20 degrees of freedom
Multiple R-squared: 0.0649, Adjusted R-squared: -0.0753
F-statistic: 0.463 on 3 and 20 DF, p-value: 0.711
y$genes[y$genes$Symbol=="Pecam1",]
glm.fit[["ENSMUSG00000020717"]]
Call:
lm(formula = . ~ duration)
Residuals:
Min 1Q Median 3Q Max
-5.461 -1.967 -0.402 2.049 6.151
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2814 1.3058 0.98 0.34
duration4weeks 0.0896 1.8466 0.05 0.96
duration6weeks -0.3399 1.8466 -0.18 0.86
duration8weeks 0.0125 1.8466 0.01 0.99
Residual standard error: 3.2 on 20 degrees of freedom
Multiple R-squared: 0.0032, Adjusted R-squared: -0.146
F-statistic: 0.0214 on 3 and 20 DF, p-value: 0.996
Obtain the statistics from GLM
# Use home-made pipeline stat_table_all to:
source("~/Documents/code/01_function/my_GLM.R")
# 1. retrieve the R-square(r2) and the corrected p value (q) from the fit summary of each gene
# 2. filter genes based on qval<0.05, r2>0.5, arrange based on q (ascending).
# 3. from filtered genes, retrieve coefficient of estimate and p val for each gene for down stream analysis.
stat_table_all <- stat_table(glm.fit)
dim(stat_table_all)
[1] 906 12
head(stat_table_all)
# add the gene names to stat_table_all
stat_table_all<-left_join(stat_table_all, y$genes, by=c("ENSMUSG_ID"="genes"))
dim(stat_table_all)
[1] 906 15
head(stat_table_all)
# export the gene list table for Kegg and GO search
# write.table(stat_table_all, paste(out_analysis, out_prefix, "_gene_list.txt", sep=""), sep="\t", row=F, quote=F)
# export the background gene list for Kegg and GO search, subtract genes of stat_table_all from y$genes
bg_gene_list <- y$genes
dim(bg_gene_list)
[1] 14329 4
#write.table(bg_gene_list, paste(out_analysis, out_prefix, "_bg_gene.txt", sep=""), sep="\t", row=F, quote=F)
Explore the GLM model
- PCA of the coefficient matrix (row: genes; colums: coefficients)
- heatmap of the coefficient matrix
- line plot of all the coefficients based on each gene
- VennDiagram plotting of genes and overlap of genes that are significant changed under each coefficient catagory
- Tile plot showing the pattern of coeffiecint
# principal component analysis of estimate of the coefficient
stat_est <- stat_table_all %>% select(contains("est"))
head(stat_est)
pca_original = prcomp(t(stat_est),scale=T, center=T)
pca_x <- data.frame(pca_original$x, estimate = rownames(pca_original$x))
pca_table <- pca_x
x <- pca_original$sdev^2/sum(pca_original$sdev^2) # Proportion of Variance Explained for all components
# Scree plot
plot(x, xlab="Principal Component", ylab="Proportion of Variance Explained", type="b")

plot(cumsum(x), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", type="b")

# PCA plot
g <- ggplot(pca_table, aes(x=pca_table[1], y=pca_table[2], color= estimate))
g <- g + geom_point(alpha=0.7, size=3)
# g <- g + labs(color = "Group", shape="Tissue")
g + labs(x = paste(names(pca_table[1]), scales::percent(x[1]),"variance explained", sep=" "), y=paste(names(pca_table[2]), scales::percent(x[2]),"variance explained", sep=" "))

# hirachical analysis of estimate of the coefficient
stat_est_h<- t(scale(t(stat_est))) %>% as.data.frame() %>% as.matrix()
library(gplots)
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(stat_est_h, col=col.pan, Rowv=TRUE, scale="none",
trace="none", dendrogram="both", cexRow=0.5, cexCol=0.7, density.info="none",
margin=c(9,4), lhei=c(2,10), lwid=c(2,6))

# plot the estimate of coefficient of each gene
stat_table_est.gather <- stat_table_all %>%
select("ENSMUSG_ID", contains("_est")) %>%
gather(key=coeff_group, value=estimate, -ENSMUSG_ID)
str(stat_table_est.gather)
'data.frame': 3624 obs. of 3 variables:
$ ENSMUSG_ID : chr "ENSMUSG00000020178" "ENSMUSG00000038805" "ENSMUSG00000068696" "ENSMUSG00000071234" ...
$ coeff_group: chr "X.Intercept._est" "X.Intercept._est" "X.Intercept._est" "X.Intercept._est" ...
$ estimate : num 0.373 0.385 7.494 0.56 0.225 ...
stat_table_est.gather$coeff_group <- factor(stat_table_est.gather$coeff_group,
levels =c("X.Intercept._est", "duration4weeks_est", "duration6weeks_est","duration8weeks_est"),
ordered = TRUE)
ggplot(stat_table_est.gather, aes(x=coeff_group,y=estimate, group=ENSMUSG_ID)) +
geom_line(alpha=0.1, color="red")

ggplot(stat_table_est.gather, aes(x=coeff_group,y=estimate, group=ENSMUSG_ID))+
geom_line(alpha=0.1, color="red") + scale_y_continuous(limits= c(-50, 75))

# To identify each gene’s critical predictor. binarized the estimate and significance matrices by hard cutoffs.
stat_pval <- stat_table_all %>% select(contains("_pval"))
head(stat_pval)
head(stat_est)
stat_binary <- (apply(stat_pval, 2, function(x) x<0.05) & apply(stat_est, 2, function(x) abs(x)>0.2)) %>% as.data.frame()
rowSums(stat_binary) %>% table()
.
0 1 2 3 4
1 435 334 96 40
# summary of the numbers of genes passes the filter
apply(stat_binary, 2, table)
X.Intercept._pval duration4weeks_pval duration6weeks_pval
FALSE 731 373 295
TRUE 175 533 611
duration8weeks_pval
FALSE 674
TRUE 232
# sepertate out genes by their coefficients
col <- as.list(names(stat_binary))
genes_by_coeff <- map(col, ~ stat_table_all[["Symbol"]][stat_binary[[.]]])
names(genes_by_coeff) <- c("Intercept", "duration4wks", "duration6wks", "duration8wks")
glimpse(genes_by_coeff)
List of 4
$ Intercept : chr [1:175] "Six3" "Gpr88" "Drd2" "Gpr6" ...
$ duration4wks: chr [1:533] "Gpr88" "Syndig1l" "Tacr1" "Ptpn9" ...
$ duration6wks: chr [1:611] "Adora2a" "Six3" "Gpr88" "Syndig1l" ...
$ duration8wks: chr [1:232] "Gpr88" "Ido1" "Adck2" "Fos" ...
# to check the overlap of genes in different coefficient catagories
library(VennDiagram)
grid.newpage()
within(genes_by_coeff,
draw.quad.venn(area1 = length(Intercept),
area2 = length(duration4wks),
area3 = length(duration6wks),
area4 = length(duration8wks),
n12 = length(intersect(Intercept, duration4wks)),
n13 = length(intersect(Intercept, duration6wks)),
n14 = length(intersect(Intercept, duration8wks)),
n23 = length(intersect(duration4wks, duration6wks)),
n24 = length(intersect(duration4wks, duration8wks)),
n34 = length(intersect(duration6wks, duration8wks)),
n123 = Intercept %>% intersect(duration4wks) %>% intersect(duration6wks) %>% length(),
n124 = Intercept %>% intersect(duration4wks) %>% intersect(duration8wks) %>% length(),
n134 = Intercept %>% intersect(duration6wks) %>% intersect(duration8wks) %>% length(),
n234 = duration4wks %>% intersect(duration6wks) %>% intersect(duration8wks) %>% length(),
n1234 = Intercept %>% intersect(duration4wks) %>% intersect(duration6wks) %>% intersect(duration8wks) %>% length(),
category = names(genes_by_coeff),
fill = c("orange", "red", "green", "blue"))
)

$Intercept
[1] "Six3" "Gpr88" "Drd2" "Gpr6"
[5] "Slc35d3" "Klhl13" "Ldlr" "Nexn"
[9] "4930432K21Rik" "Rasgrp2" "Tmem200a" "Hrh1"
[13] "Shq1" "Scn5a" "Ofd1" "Ybey"
[17] "3632451O06Rik" "Fam177a" "Lpcat2" "Gpatch11"
[21] "Fam111a" "Mtrf1" "Nectin4" "Phf7"
[25] "Lrrc49" "Diexf" "Micall2" "Arhgap22"
[29] "Insig1" "Mme" "Arl5c" "C77080"
[33] "Bcl6b" "Ppfibp2" "Zfp804a" "Nr1d1"
[37] "Ptcd1" "Anapc4" "Ddx27" "Slc22a3"
[41] "Dtwd2" "Pik3c3" "Vipr1" "Gm5083"
[45] "Rars2" "Tcerg1l" "E130102H24Rik" "Zfp930"
[49] "Ankra2" "Nanos2" "Gtf2h1" "B3gnt9"
[53] "Ttc34" "Tada1" "Ccdc183" "Pex13"
[57] "Samd3" "Fam53b" "Tigd2" "Herpud2"
[61] "Rasd1" "Trim17" "Rbm43" "Pcsk9"
[65] "Mta2" "Dbr1" "Dhx32" "Rac2"
[69] "4930513N10Rik" "Gpt" "Cog6" "Pum3"
[73] "Runx2" "Nrap" "Nub1" "Gart"
[77] "Wrb" "Sdhaf3" "Zfp791" "Zwilch"
[81] "Mustn1" "Ubxn4" "Bbs9" "Smad7"
[85] "Zkscan6" "Oxnad1" "Glyctk" "Rassf6"
[89] "Arhgap27" "Nat8f5" "Dusp7" "St7l"
[93] "Pced1b" "Cdkal1" "2700046G09Rik" "Ufd1"
[97] "Il21r" "Card19" "Eif4e2" "Gemin8"
[101] "Gm11696" "6430550D23Rik" "Fra10ac1" "Pdhx"
[105] "Cbx8" "Iqcc" "Kcna5" "Znrf1"
[109] "P4ha3" "Hspa9" "Rabgef1" "Slc52a3"
[113] "Zdhhc15" "2310057M21Rik" "Cys1" "Hvcn1"
[117] "Cyb5d2" "Iba57" "Arntl" "Tpm1"
[121] "Top1" "Otx1" "Trim26" "Agk"
[125] "Meltf" "Sh3yl1" "Rerg" "Zbed4"
[129] "Rasip1" "Kcne1l" "Lfng" "Met"
[133] "Hpdl" "Klhdc1" "Mettl25" "Tecpr1"
[137] "Snx2" "Ccdc181" "Ccl6" "Cmtm3"
[141] "Thtpa" "Adprm" "BC030500" "Zc3h8"
[145] "Sox18" "Slc9a9" "Mob3c" "Atpaf1"
[149] "Slc35g2" "Wdr92" "Cbwd1" "Hic2"
[153] "Isg15" "6330409D20Rik" "Ucp3" "Mettl2"
[157] "Atg14" "C530005A16Rik" "1810010H24Rik" "Sema5b"
[161] "Aggf1" "Cutc" "LOC100861615" "Ephb3"
[165] "Ubc" "Araf" "Plrg1" "Aspa"
[169] "Zfp7" "Zfp202" "Cluap1" "Inip"
[173] "Lhx2" "Tgfa" "Ttc26"
$duration4wks
[1] "Gpr88" "Syndig1l" "Tacr1" "Ptpn9"
[5] "Adck2" "Inhba" "Rxrg" "Trib2"
[9] "Banp" "Oprk1" "Fos" "Sh2d3c"
[13] "Ldlr" "4930432K21Rik" "Nuak1" "Spred2"
[17] "Gfod1" "Smad3" "Cln6" "Cwc25"
[21] "Cdyl2" "Naglu" "Lrrc61" "Tmem200a"
[25] "Casp7" "Dll4" "Rilpl1" "Dusp4"
[29] "Junb" "Mccc2" "Schip1" "Dnajc21"
[33] "Ier2" "Spata5" "Hrh1" "Shq1"
[37] "Erf" "Rrad" "Scn5a" "Ofd1"
[41] "Cbfa2t3" "Ybey" "St8sia5" "Heyl"
[45] "Mical2" "Ccdc112" "Mamld1" "3632451O06Rik"
[49] "Sgsm1" "Myadml2" "Zfp459" "Fam177a"
[53] "Fmnl1" "Tdp1" "Pfkfb3" "Bhlhe40"
[57] "Hyi" "Zfand3" "Sco1" "Cc2d1b"
[61] "Lrfn2" "Lig4" "Lpcat2" "Mfap3"
[65] "Pdp1" "Cd180" "Cnot4" "Gpatch11"
[69] "Fam111a" "Slc10a7" "Plcl2" "S1pr2"
[73] "Smg9" "Bdnf" "B4galt4" "Mtrf1"
[77] "Heatr3" "Trpm4" "Mark3" "Nfatc2"
[81] "Ngfr" "Nectin4" "Htr1b" "Phf7"
[85] "Lrrc49" "Crem" "Ipmk" "Zfyve1"
[89] "Glrx2" "4930453N24Rik" "Ccnf" "Trim68"
[93] "Micall2" "Arhgap22" "Blzf1" "Insig1"
[97] "Dusp16" "Arl5c" "Zfand4" "Tefm"
[101] "Jun" "Bcl6b" "Slc25a35" "Gtf2ird1"
[105] "Fam212b" "Mex3d" "A930017K11Rik" "Ccdc6"
[109] "Plekha3" "Ppfibp2" "Phf13" "Grip1"
[113] "Ilvbl" "Armc5" "E2f6" "Fam222a"
[117] "Synpo" "Htr6" "Ciart" "Ptcd1"
[121] "Frmd3" "Anapc4" "Zbtb12" "Lhfp"
[125] "Actr5" "Zmynd8" "Slc22a3" "Hunk"
[129] "Lmcd1" "Gpr83" "Zscan22" "Pms1"
[133] "Dusp8" "Chaf1a" "Hrh2" "Plcd1"
[137] "Mmp24" "Mycn" "Gpr1" "Pik3c3"
[141] "Thap1" "Gm20878" "Ankrd33b" "Snap29"
[145] "Layn" "Gm5083" "Ddx50" "Pla2g4e"
[149] "Zfp366" "Prkag2" "Abcb1b" "Tcerg1l"
[153] "Zfp930" "Ppm1d" "Nanos2" "Rassf1"
[157] "Gtf2h1" "Tfdp1" "Pou6f2" "Cacng4"
[161] "Cables1" "Gpr68" "Uhrf2" "B3gnt9"
[165] "A430108G06Rik" "Map3k21" "LOC100862446" "Gpr137b"
[169] "Homer1" "Tada1" "Fam117a" "Rasl11b"
[173] "Cbln2" "Ccdc183" "Hist1h2ao" "Narf"
[177] "P4ha1" "Cnep1r1" "Pex13" "Abhd6"
[181] "Ubox5" "Irs2" "4931440F15Rik" "Nck1"
[185] "Kbtbd2" "Bend3" "2410002F23Rik" "Tigd2"
[189] "Mapk14" "Zbtb40" "Camk1g" "Rhobtb2"
[193] "Lsm11" "Adam19" "Ephb2" "St7"
[197] "Rhob" "1700029J07Rik" "Rnd3" "Tgfb3"
[201] "Lrrc29" "Pop1" "Herpud2" "Arhgef7"
[205] "Rasd1" "Rtel1" "Prdm4" "Mettl3"
[209] "Kcnf1" "Wisp1" "Tiparp" "Lss"
[213] "Dvl2" "Rbm43" "Wdcp" "Cd300c2"
[217] "Jade2" "Prickle1" "Dhx32" "Abcc10"
[221] "Slc35f3" "4930513N10Rik" "Dusp14" "Sgms1"
[225] "Pafah2" "Plekho2" "Serinc2" "Cog6"
[229] "Pum3" "Sema4c" "Chadl" "Hmox1"
[233] "Brix1" "Apold1" "Dars" "Sf3a3"
[237] "Cpne2" "Fbn1" "Usp18" "Scpep1"
[241] "Dnttip2" "Cd302" "Adamts1" "Zdhhc5"
[245] "Foxred1" "Ubxn10" "Xbp1" "Cited2"
[249] "Ltv1" "Bcor" "Fam171a1" "Nrap"
[253] "Nub1" "Rad9b" "Gart" "Zfp438"
[257] "2210408I21Rik" "Wrb" "Nr4a1" "Zfp180"
[261] "Mtmr4" "Ppp2r5a" "Nop2" "Sdhaf3"
[265] "Sh2d5" "Galns" "Zfp791" "Arhgef3"
[269] "Hps1" "Calhm2" "D630045J12Rik" "Zwilch"
[273] "Mustn1" "Zswim4" "Hspa12b" "Egr2"
[277] "Rfwd2" "Zfp316" "Slfn2" "Rnf4"
[281] "Ubxn4" "Usp43" "Hexim2" "Ppp1r13l"
[285] "Erp44" "Man2b2" "Ccl9" "Smad7"
[289] "Vegfc" "Ric8b" "Cfap70" "Tmem220"
[293] "Aagab" "F2rl2" "Exosc3" "Gspt2"
[297] "Sh3rf3" "Etv5" "Xylb" "Ldlrad3"
[301] "Glyctk" "Srp68" "Snhg5" "Actr8"
[305] "Alox12b" "Arhgap27" "Nat8f5" "Nol10"
[309] "Crnkl1" "Smu1" "Nkrf" "Nxpe4"
[313] "Tmem100" "St7l" "Pgm1" "Mak16"
[317] "Atp8b2" "Cep131" "Wdsub1" "Nacad"
[321] "Sord" "Golt1b" "Pced1b" "Tmem121b"
[325] "Cdkal1" "Zfp790" "Tiam2" "Adcyap1"
[329] "Etv6" "Creb5" "Fstl3" "2700046G09Rik"
[333] "Ufd1" "Adra1d" "Hnrnpf" "Naa35"
[337] "Smg8" "Naa25" "Rundc1" "Ndufaf4"
[341] "Ints5" "Tlr9" "Pdyn" "Tox4"
[345] "Card19" "Eif4e2" "Srpk3" "Ythdf2"
[349] "Gldn" "Mycl" "6430550D23Rik" "Ido2"
[353] "Mybpc1" "Rgs11" "Irak1" "Rrs1"
[357] "Iqgap2" "Il17ra" "E2f4" "Pdhx"
[361] "Cbx8" "Cyr61" "Zcwpw1" "Atp8b1"
[365] "Kcna5" "Mus81" "Plpbp" "Fam46a"
[369] "Olig2" "Slc2a9" "P4ha3" "Hspa9"
[373] "Rabgef1" "Ggnbp2" "Slc52a3" "Zdhhc15"
[377] "2310057M21Rik" "Stx3" "Tspyl2" "Asb4"
[381] "Cys1" "Hvcn1" "Top3a" "Smad2"
[385] "Rhbdd1" "Cercam" "Zfp687" "Axin1"
[389] "Uck2" "Krcc1" "Ddx10" "Idua"
[393] "Trim16" "Mis12" "Lrrtm2" "Iba57"
[397] "Arntl" "Fancc" "Slc9a8" "Ldlrap1"
[401] "Neurl2" "Tpm1" "Egr3" "Pdzrn4"
[405] "Rtn4rl2" "Top1" "Otx1" "Rassf5"
[409] "Agk" "Rad17" "Meltf" "Sh3yl1"
[413] "1700034H15Rik" "Rin1" "Surf6" "Srarp"
[417] "Kcnip3" "Rasip1" "Lfng" "Loxl3"
[421] "Ddr1" "Zufsp" "Plaa" "Dhx16"
[425] "Met" "Fam84a" "Inafm2" "Jdp2"
[429] "Ercc8" "Hlcs" "Sgsh" "4732491K20Rik"
[433] "Nfe2l3" "Mpp3" "Eed" "Mettl25"
[437] "Oas1c" "Wdr91" "Tecpr1" "Mphosph10"
[441] "Mthfr" "Zfp874b" "Snx2" "Tob2"
[445] "Ulk3" "Med26" "Gtpbp4" "Gramd1b"
[449] "Zfp90" "Akap7" "Dhx8" "Sema6c"
[453] "Serinc3" "Ccl6" "Prpf3" "Fkbp5"
[457] "Thtpa" "Cdyl" "Trim9" "Fam110b"
[461] "BC030500" "Ints8" "Mob3c" "Lrrc57"
[465] "Atpaf1" "Slc35g2" "B230206H07Rik" "Klhl8"
[469] "Wdr92" "Cbwd1" "Rpusd2" "Zranb1"
[473] "Isg15" "Unc45a" "Epha2" "Irf8"
[477] "Moap1" "6330409D20Rik" "Tmem109" "Tbx2"
[481] "Ucp3" "Clec4a2" "Cdkn1a" "B4galt5"
[485] "Atg14" "Fam155a" "Kansl2" "1810010H24Rik"
[489] "Rars" "D17H6S53E" "Cyb5rl" "Sema5b"
[493] "Mrpl50" "Arf6" "Aggf1" "Gimap1"
[497] "Mboat1" "Cnot9" "Rrm2" "Gdpd5"
[501] "BC034090" "Rbm19" "Srfbp1" "Sowahb"
[505] "Araf" "Plrg1" "Mcam" "Aspa"
[509] "Zfp7" "Synj2" "Pced1a" "Sec23ip"
[513] "Cluap1" "Zbtb17" "Scg2" "Gm960"
[517] "Gm11549" "Fndc8" "Inip" "Arsg"
[521] "Chsy1" "Tbcd" "Tmem8" "Gimap6"
[525] "Tmem200b" "Spidr" "Tgfa" "Ttc26"
[529] "Cactin" "Nup43" "Pcsk1" "Rnpepl1"
[533] "Car14"
$duration6wks
[1] "Adora2a" "Six3" "Gpr88" "Syndig1l"
[5] "Cd4" "Chat" "Isl1" "Penk"
[9] "Trh" "Drd2" "Gpr6" "Sh3rf2"
[13] "Rgs9" "Tacr1" "Htr1d" "Slc35d3"
[17] "Lrrc10b" "Drd1" "Slc5a7" "Slc10a4"
[21] "Ido1" "Ptpn9" "Ppp1r1b" "Klhl13"
[25] "Adck2" "Inhba" "Pde1b" "Rxrg"
[29] "Trib2" "Banp" "Oprk1" "Fos"
[33] "Gng7" "Sh2d3c" "Rarb" "Ldlr"
[37] "Nexn" "Serpina9" "Pde10a" "Six3os1"
[41] "4930432K21Rik" "Nuak1" "Spred2" "Dach1"
[45] "Gfod1" "Numb" "Rasgrp2" "Smad3"
[49] "Adcy5" "Cln6" "Gnb4" "Cwc25"
[53] "Cdyl2" "Asic4" "Naglu" "Lrrc61"
[57] "Ecel1" "Per2" "Casp7" "Dll4"
[61] "Rilpl1" "Dusp4" "Junb" "Mccc2"
[65] "Schip1" "Dnajc21" "Ier2" "Sik2"
[69] "Spata5" "Hrh1" "Shq1" "Slco5a1"
[73] "Scn5a" "Ofd1" "Cbfa2t3" "Rtn4ip1"
[77] "Ybey" "St8sia5" "Heyl" "Mical2"
[81] "Ccdc112" "Mamld1" "3632451O06Rik" "Prag1"
[85] "Baz1a" "Gpr149" "Sgsm1" "Rbm11"
[89] "Myadml2" "B3galt6" "Zfp459" "Fam177a"
[93] "Fmnl1" "Tdp1" "Pfkfb3" "Bhlhe40"
[97] "Igf2bp2" "Osbpl3" "Zfand3" "Sco1"
[101] "Ttc22" "Slc37a1" "Cc2d1b" "Lrfn2"
[105] "Lig4" "Mfap3" "Pdp1" "Cd180"
[109] "Ptgs2" "Cnot4" "Gpatch11" "Bace2"
[113] "Slc10a7" "Plcl2" "Ninl" "Irf2"
[117] "Bdnf" "B4galt4" "Mtrf1" "Trim45"
[121] "Trpm4" "Mark3" "Nfatc2" "Ngfr"
[125] "Nectin4" "Lrrc49" "4931415C17Rik" "Crem"
[129] "Hspbap1" "Csrnp2" "Itga9" "Ipmk"
[133] "Zfyve1" "Diexf" "Dlk1" "Magel2"
[137] "Glrx2" "Tipin" "Phf21b" "Ccnf"
[141] "Trim68" "Pim1" "Arhgap39" "Gramd4"
[145] "Mme" "Dusp16" "Arl5c" "Zfand4"
[149] "Zc3h6" "Tefm" "C77080" "Jun"
[153] "Slc25a35" "Fbxl4" "Oacyl" "Fam212b"
[157] "Dnajc1" "Ccdc6" "Plekha3" "Plppr1"
[161] "Pxdn" "Phf13" "Grip1" "Zfp804a"
[165] "B3gat2" "2810029C07Rik" "Nr1d1" "Ilvbl"
[169] "Armc5" "E2f6" "Dgkk" "Fam222a"
[173] "P4ha2" "Synpo" "Ptcd1" "Frmd3"
[177] "Ahdc1" "Parn" "Mcf2l" "Kdm4d"
[181] "Lhfp" "Ddx27" "Zmynd8" "Gpr21"
[185] "Slc22a3" "Ehhadh" "Hunk" "Zfp941"
[189] "Gpr83" "Pms1" "Dtwd2" "Myo5b"
[193] "Prr5" "Chaf1a" "Slc35f4" "Plin4"
[197] "Mmp24" "Lzts1" "Gpr1" "Pik3c3"
[201] "Gm20878" "Ankrd33b" "Snap29" "Rnf165"
[205] "Rin3" "Vipr1" "Gm5083" "Ddx50"
[209] "Pla2g4e" "Zfp366" "Prkag2" "Abcb1b"
[213] "Sp9" "Lrrc56" "Rars2" "Ccdc103"
[217] "Tcerg1l" "E130102H24Rik" "Mapk4" "Ankra2"
[221] "Fzd1" "Ppm1d" "Rassf1" "Cacng4"
[225] "Cables1" "Gpr68" "Uhrf2" "Mall"
[229] "Epm2a" "Map3k21" "Ttc34" "LOC100862446"
[233] "Gpr137b" "Homer1" "Fam117a" "Cbln2"
[237] "Ccdc183" "P4ha1" "Usb1" "Plekhg5"
[241] "Mkl1" "Abhd6" "Ubox5" "Prkab1"
[245] "Irs2" "Dusp1" "Mak" "Kbtbd2"
[249] "Bend3" "Samd3" "Fam53b" "Zmym1"
[253] "Tigd2" "Mapk14" "Zbtb40" "Camk1g"
[257] "Rhobtb2" "Lsm11" "Ephb2" "St7"
[261] "Rhob" "1700029J07Rik" "6430571L13Rik" "Rnd3"
[265] "Pop1" "Dio2" "Arhgef7" "Cd276"
[269] "Rasd1" "Blm" "Trim62" "Asap2"
[273] "Cftr" "Omg" "Wisp1" "Ift52"
[277] "Ptpn1" "Gabrq" "Trim17" "Synpr"
[281] "Lss" "Actn2" "Meis1" "Galnt7"
[285] "Rbm43" "Pcsk9" "Wdcp" "Ammecr1"
[289] "Mta2" "Jade2" "Dbr1" "Prickle1"
[293] "Dhx32" "Rac2" "4930513N10Rik" "Haus3"
[297] "Gpt" "Dusp14" "Sgms1" "Strip2"
[301] "Pafah2" "Serinc2" "Cdh9" "Sema4c"
[305] "Adamts3" "Hmox1" "Gk5" "Apold1"
[309] "Morc2b" "Dars" "Nadk" "Cpne2"
[313] "Runx2" "Fbn1" "Cd302" "Adamts1"
[317] "Ubxn10" "Xbp1" "Pcp4l1" "Bcor"
[321] "Fam171a1" "Nub1" "Anxa3" "Bbs2"
[325] "Gart" "Lrsam1" "Sdr42e1" "Zfp438"
[329] "Spry2" "Wrb" "Traf3" "Nr4a1"
[333] "Nrde2" "Mfhas1" "Gjb6" "Zfp180"
[337] "Mtmr4" "Ppp2r5a" "Sdhaf3" "Sh2d5"
[341] "Zfp791" "Rfesd" "Arhgef3" "Pcdh20"
[345] "Tmcc1" "Zswim4" "Nipal2" "Tbx1"
[349] "Rfwd2" "Hcfc2" "Nprl3" "Zfp316"
[353] "Slfn2" "Usp43" "Bbs9" "Ppp1r13l"
[357] "Ccdc134" "Pxmp4" "Man2b2" "Cfap70"
[361] "Zkscan6" "Sgcd" "Zmat1" "Zc3h3"
[365] "Lgals12" "Sh3rf3" "Etv5" "Oxnad1"
[369] "Pgbd5" "Zfp277" "Ddx59" "Rassf6"
[373] "Nol10" "Dusp7" "Pid1" "Nxpe4"
[377] "Acin1" "Mmachc" "Ptprr" "Rimkla"
[381] "Wdsub1" "P2ry2" "Fam221a" "Pipox"
[385] "Sord" "Furin" "Cdkal1" "Etv6"
[389] "Creb5" "Kirrel3" "Fstl3" "Nos3"
[393] "Cdh4" "Ufd1" "Mocs1" "Rundc1"
[397] "Col11a1" "Ndufaf4" "Il21r" "Tesk2"
[401] "Pdyn" "Nt5dc1" "Gemin8" "Cd36"
[405] "Gm16299" "Srpk3" "Gm11696" "Gldn"
[409] "4933407L21Rik" "Spred3" "Pex2" "Trak1"
[413] "Ifit1bl1" "Mms22l" "Clspn" "Fra10ac1"
[417] "A130010J15Rik" "Rrs1" "Il17ra" "E2f4"
[421] "Dnajb5" "Eif4b" "Cbx8" "Cyr61"
[425] "Iqcc" "Kcna5" "Mus81" "Ptpn12"
[429] "Plpbp" "Fam46a" "Znrf1" "1110032F04Rik"
[433] "Ryk" "P4ha3" "Scube3" "Gtf2ird2"
[437] "Klf11" "Kifc1" "Zdhhc15" "2310057M21Rik"
[441] "Etfrf1" "Mpp6" "Cyb5d2" "Socs3"
[445] "Heatr1" "Bend7" "D10Wsu102e" "Gm9776"
[449] "Ccdc14" "Ttc38" "Gm14204" "Tbx3"
[453] "Tpbgl" "Mis12" "Fxr1" "Ccno"
[457] "BC048403" "Zfp810" "Fam196a" "Prkd2"
[461] "Papd7" "Slc9a8" "Tpm1" "Filip1"
[465] "Dnd1" "Rtn4rl2" "Top1" "Slc9a5"
[469] "Trim26" "Rassf5" "Tex10" "Agk"
[473] "Casp9" "Wnt9a" "Meltf" "Sh3yl1"
[477] "Rerg" "Scn4b" "Ak4" "Srarp"
[481] "Zbed4" "Rasip1" "Cacna2d2" "Evpl"
[485] "Ccl28" "Kcne1l" "Adnp2" "Tmem241"
[489] "Lfng" "Trpc7" "Max" "Zbtb25"
[493] "Mxi1" "Fam241a" "Nfam1" "H1f0"
[497] "Met" "Hpdl" "Inafm2" "Rwdd3"
[501] "Qtrt1" "Klhdc1" "Mtcl1" "Prkch"
[505] "Spred1" "1110008L16Rik" "Kcna4" "Dpf2"
[509] "Ppp6r1" "1700003D09Rik" "N6amt1" "Snx2"
[513] "Tob2" "Irs4" "Csmd2" "Dph6"
[517] "Tyw3" "Sp2" "Gramd1b" "Ccdc181"
[521] "Gfpt2" "Slc16a6" "Unc13c" "Ccl6"
[525] "Ccnd2" "Prpf3" "Cmtm3" "Zhx2"
[529] "Trim9" "Adprm" "Rflnb" "Aco1"
[533] "BC030500" "Mfsd4a" "Zc3h8" "Sox18"
[537] "Rps6ka5" "Slc9a9" "Sapcd2" "Atpaf1"
[541] "Vgf" "Brca2" "Hic1" "Rft1"
[545] "Slc35g2" "Cbwd1" "Lin9" "Hic2"
[549] "Tmem267" "Bcl2" "Btg2" "Unc45a"
[553] "Noxred1" "Dclk2" "Flcn" "E130317F20Rik"
[557] "Moap1" "Mettl2" "Cdkn1a" "B4galt5"
[561] "C530005A16Rik" "Rin2" "Kansl2" "Ap4b1"
[565] "Cyb5rl" "1700019D03Rik" "Nmb" "Utp18"
[569] "B3gntl1" "Gimap1" "Mamstr" "Cutc"
[573] "LOC100861615" "Ephb3" "Zfp119a" "Fbxo16"
[577] "Zscan18" "Ubc" "Nr6a1" "Araf"
[581] "Mlip" "Arhgap31" "Cdc42ep3" "Smim3"
[585] "Uspl1" "Sept9" "Zbtb16" "Ece1"
[589] "Bcl6" "Zfp202" "Tspan14" "Bloc1s5"
[593] "Arpin" "Wdr25" "Zbtb17" "Riox2"
[597] "Lrrc8d" "9630001P10Rik" "Rabl3" "Spry4"
[601] "Fndc8" "Cbx4" "Lhx2" "Zfp512"
[605] "Npbwr1" "Pdia4" "Baiap3" "Nlrx1"
[609] "Ttc26" "Uvssa" "Socs2"
$duration8wks
[1] "Gpr88" "Ido1" "Adck2" "Fos"
[5] "Sh2d3c" "Ldlr" "Pde10a" "4930432K21Rik"
[9] "Smad3" "Cwc25" "Lrrc61" "Per2"
[13] "Junb" "Ier2" "Ofd1" "Rtn4ip1"
[17] "Ybey" "Ccdc112" "3632451O06Rik" "Rbm11"
[21] "Myadml2" "B3galt6" "Zfp459" "Fam177a"
[25] "Igf2bp2" "Osbpl3" "Sco1" "Slc37a1"
[29] "Lpcat2" "Gpatch11" "Bace2" "Fam111a"
[33] "Slc10a7" "S1pr2" "Ninl" "Mtrf1"
[37] "Trim45" "Lrrc49" "Csrnp2" "Pdzd9"
[41] "Diexf" "Glrx2" "Tipin" "Phf21b"
[45] "Arhgap39" "Insig1" "Arl5c" "Tefm"
[49] "Fbxl4" "Mex3d" "A930017K11Rik" "Plekha3"
[53] "Plppr1" "Phf13" "2810029C07Rik" "Htr6"
[57] "Ciart" "Frmd3" "Parn" "Anapc4"
[61] "Ddx27" "Pms1" "Dtwd2" "Prr5"
[65] "Chaf1a" "Slc35f4" "Plin4" "Pik3c3"
[69] "Rin3" "Gm5083" "Zfp366" "Rars2"
[73] "Ifit1" "Tcerg1l" "E130102H24Rik" "Zfp930"
[77] "Fzd1" "Ppm1d" "Gtf2h1" "Pou6f2"
[81] "Epm2a" "A430108G06Rik" "Ttc34" "Ccdc183"
[85] "Usb1" "Dusp1" "Mak" "Samd3"
[89] "Tigd2" "Rnd3" "Cd276" "Trim62"
[93] "Prdm4" "Omg" "Ift52" "Tiparp"
[97] "Rbm43" "Mta2" "Dhx32" "Rac2"
[101] "4930513N10Rik" "Gpt" "Sgms1" "Plekho2"
[105] "Cog6" "Pum3" "Morc2b" "Dars"
[109] "Scpep1" "Dnttip2" "Prss57" "Nrap"
[113] "Nub1" "Anxa3" "Gart" "Sdr42e1"
[117] "Wrb" "Gm14440" "Traf3" "Nr4a1"
[121] "Gjb6" "Sdhaf3" "Tmcc1" "Nipal2"
[125] "Egr2" "Pak7" "Bbs9" "Pxmp4"
[129] "Zkscan6" "Exosc3" "Gspt2" "Oxnad1"
[133] "Zfp277" "Actr8" "Arhgap27" "Serpine1"
[137] "Smu1" "St7l" "P2ry2" "Golt1b"
[141] "Cdkal1" "Adcyap1" "Il21r" "Pdyn"
[145] "Eif4e2" "6430550D23Rik" "Spred3" "Mybpc1"
[149] "Ifit1bl1" "Fra10ac1" "Iqgap2" "Eif4b"
[153] "Pdhx" "Cbx8" "Kcna5" "Znrf1"
[157] "1110032F04Rik" "Hspa9" "Rabgef1" "Ggnbp2"
[161] "Zdhhc15" "Hvcn1" "Cyb5d2" "Ddx10"
[165] "Tbx3" "Rbm4" "Notch4" "Arntl"
[169] "Tpm1" "Xlr3a" "Trit1" "Agk"
[173] "Rad17" "Casp9" "Wnt9a" "Sh3yl1"
[177] "Hnrnpa1" "Zbed4" "Rasip1" "Adnp2"
[181] "Gm12522" "Lfng" "Loxl3" "Ell"
[185] "Nfam1" "H1f0" "Met" "Klhdc1"
[189] "Eed" "Zfp874b" "Snx2" "Capn11"
[193] "Ccdc181" "Ccl6" "Nt5c1a" "Cmtm3"
[197] "BC030500" "Zc3h8" "Sox18" "Sapcd2"
[201] "Atpaf1" "Slc35g2" "Wdr92" "Cbwd1"
[205] "Hic2" "Noxred1" "Moap1" "Fmo2"
[209] "Mettl2" "Atg14" "C530005A16Rik" "Ap4b1"
[213] "Vps37b" "Ing2" "Sema5b" "Cutc"
[217] "LOC100861615" "Zscan18" "Ubc" "Nop14"
[221] "Araf" "Mlip" "Aspa" "Zfp7"
[225] "Bloc1s5" "Cluap1" "Arpin" "Riox2"
[229] "Rabl3" "Lhx2" "Chsy1" "Zfp512"
# To classify genes in more detail, we collapsed the binarized matrix gene by gene and grouped genes from the unique collapsed patterns.
head(stat_binary)
colnames(stat_binary) <- c("Intercept", "duration4wks", "duration6wks", "duration8wks")
#stat_binary <- stat_binary %>% add_column(Symbol=stat_table_all$Symbol, .before=1)
stat_binary <- stat_binary %>% mutate(pattern= paste(Intercept,duration4wks,duration6wks, duration8wks ,sep = "_"))
pattern_TF <- table(stat_binary$pattern) # %>% as.data.frame()
stat_binary.gather <- stat_binary %>% gather(key=coeff_group, value=value, -pattern)
stat_binary.gather$coeff_group <- factor(stat_binary.gather$coeff_group,
levels = c("Intercept","duration4wks", "duration6wks", "duration8wks"),
ordered = TRUE)
ggplot(stat_binary.gather, aes(x = coeff_group, y = pattern, fill = value)) + geom_tile(colour = "white") +
theme_bw() + xlab("") + ylab("") + coord_flip() +
# scale_x_discrete(labels = c("Intercept", "duration4wks", "duration6wks", "duration8wks")) +
scale_y_discrete(labels = pattern_TF) +
scale_fill_manual(values = c("grey80", "firebrick1"))

Hierarchical Clustering of all the samples based on Top 100 genes from GLM
- heatmap with ordering the sample by their treatment
- heatmap without ordering the sample (column)
# heatmap
logCPM <- log.cpm
rownames(logCPM) <- y$genes$Symbol
colnames(logCPM) <- data.design$sample_short # get it into the y project
logCPM<- logCPM[stat_table_all$Symbol[1:100],]
# get rid of the running group
logCPM<- t(scale(t(logCPM))) %>% as.data.frame() %>% as.matrix()
logCPM.order <- logCPM[, colnames(logCPM) %>% order()]
library(gplots)
#heatmap, columns ordered by their group/treatment
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(logCPM.order, col=col.pan, Rowv=TRUE, Colv= FALSE, scale="none",
trace="none", dendrogram="row", cexRow=0.5, cexCol=0.7, density.info="none",
margin=c(9,4), lhei=c(2,10), lwid=c(2,6))

#heatmap, columns ordered by the clustering
col.pan <- colorpanel(100, "blue", "white", "red")
heatmap.2(logCPM.order, col=col.pan, Rowv=TRUE, scale="none",
trace="none", dendrogram="both", cexRow=0.5, cexCol=0.7, density.info="none",
margin=c(9,4), lhei=c(2,10), lwid=c(2,6))

GO and Kegg pathway analysis
## ---- GO and Kegg pathway analysis
# Xulong's function
source("~/Documents/code/01_function/Kegg_function.R")
gk.diet <- myGK(stat_table_all$Symbol)
head(gk.diet$BP[, c("Term", "Pvalue")])
head(gk.diet$MF[, c("Term", "Pvalue")])
head(gk.diet$CC[, c("Term", "Pvalue")])
gk.diet$KEGG[, c("Term", "Pvalue")]
# output the analysis result
#file_name<- list("./analysis/Cortex_BP.txt", "./analysis/Cortex_MF.txt", "./analysis/Cortext_CC.txt", "./analysis/Cortext_KEGG.txt")
#walk2(gk.diet, file_name, write.table, sep="\t", row=F, quote=F)
#write.table(stat_table_all, "./analysis/diet_gene.txt", sep="\t", row=F, quote=F)
sessionInfo()
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: OS X El Capitan 10.11.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] grid parallel stats4 stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] GO.db_3.5.0 pathview_1.18.2 org.Hs.eg.db_3.5.0
[4] org.Mm.eg.db_3.5.0 KEGG.db_3.2.3 GOstats_2.44.0
[7] graph_1.56.0 Category_2.44.0 Matrix_1.2-12
[10] AnnotationDbi_1.40.0 VennDiagram_1.6.19 futile.logger_1.4.3
[13] gplots_3.0.1 corrplot_0.84 IRanges_2.12.0
[16] S4Vectors_0.16.0 Biobase_2.38.0 BiocGenerics_0.24.0
[19] edgeR_3.20.9 limma_3.34.9 bindrcpp_0.2
[22] forcats_0.3.0 stringr_1.3.0 dplyr_0.7.4
[25] purrr_0.2.4 readr_1.1.1 tidyr_0.8.0
[28] tibble_1.4.2 ggplot2_2.2.1 tidyverse_1.2.1
[31] knitr_1.20 BiocStyle_2.6.1
loaded via a namespace (and not attached):
[1] nlme_3.1-131 bitops_1.0-6 lubridate_1.7.3
[4] bit64_0.9-7 httr_1.3.1 rprojroot_1.3-2
[7] Rgraphviz_2.22.0 tools_3.4.3 backports_1.1.2
[10] R6_2.2.2 KernSmooth_2.23-15 DBI_0.8
[13] lazyeval_0.2.1 colorspace_1.3-2 tidyselect_0.2.4
[16] mnormt_1.5-5 bit_1.1-12 compiler_3.4.3
[19] cli_1.0.0 rvest_0.3.2 xml2_1.2.0
[22] labeling_0.3 KEGGgraph_1.38.1 caTools_1.17.1
[25] scales_0.5.0 psych_1.7.8 genefilter_1.60.0
[28] RBGL_1.54.0 digest_0.6.15 foreign_0.8-69
[31] rmarkdown_1.9 XVector_0.18.0 AnnotationForge_1.20.0
[34] pkgconfig_2.0.1 htmltools_0.3.6 rlang_0.2.0
[37] readxl_1.0.0 rstudioapi_0.7 RSQLite_2.0
[40] bindr_0.1.1 jsonlite_1.5 gtools_3.5.0
[43] RCurl_1.95-4.10 magrittr_1.5 Rcpp_0.12.16
[46] munsell_0.4.3 stringi_1.1.7 yaml_2.1.18
[49] zlibbioc_1.24.0 plyr_1.8.4 blob_1.1.0
[52] gdata_2.18.0 crayon_1.3.4 lattice_0.20-35
[55] Biostrings_2.46.0 haven_1.1.1 splines_3.4.3
[58] annotate_1.56.1 KEGGREST_1.18.1 hms_0.4.2
[61] locfit_1.5-9.1 pillar_1.2.1 reshape2_1.4.3
[64] futile.options_1.0.0 XML_3.98-1.10 glue_1.2.0
[67] evaluate_0.10.1 lambda.r_1.2 modelr_0.1.1
[70] png_0.1-7 cellranger_1.1.0 gtable_0.2.0
[73] assertthat_0.2.0 xtable_1.8-2 broom_0.4.3
[76] survival_2.41-3 memoise_1.1.0 statmod_1.4.30
[79] GSEABase_1.40.1